
def remove_matching_lines(urls_file, crawled_file):
    '''
    将已经爬取过的文本清单中的内容，从爬虫列表中清楚
    '''
    # 读取crawled文件中的所有行
    with open(crawled_file, 'r', encoding='utf-8') as crawled:
        crawled_lines = crawled.readlines()

    # 读取toutiao文件中的所有行
    with open(urls_file, 'r', encoding='utf-8') as uls:
        urls_lines = uls.readlines()

    # 过滤掉toutiao中包含crawled中任何一行的行
    filtered_lines = []
    for toutiao_line in urls_lines:
        if not any(crawled_line.strip() in toutiao_line for crawled_line in crawled_lines):
            filtered_lines.append(toutiao_line)

    # 将过滤后的内容写回toutiao文件
    with open(urls_file, 'w', encoding='utf-8') as toutiao:
        toutiao.writelines(filtered_lines)

# 调用函数，传入文件路径
remove_matching_lines('D:\\data\\toutiao.txt', 'D:\\data\\crawled.txt')